library(ggplot2)
library(devtools)
library(phyloseq)
library(picante)
library(tidyr)
library(forcats)
library(dplyr)
library(tibble)
library(cowplot)
library(picante) # Will also include ape and vegan 
library(car) # For residual analysis
library(sandwich) # for vcovHC function in post-hoc test
library(MASS) # For studres in plot_residuals function
library(boot) # For cross validation
source("code/Muskegon_functions.R")
source("code/set_colors.R")

Load data

# Loads a phyloseq object named otu_merged_musk_pruned)
load("data/surface_PAFL_otu_pruned_raw.RData")
# The name of the phyloseq object is: 
surface_PAFL_otu_pruned_raw 
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 7806 taxa and 24 samples ]
## sample_data() Sample Data:       [ 24 samples by 44 sample variables ]
## tax_table()   Taxonomy Table:    [ 7806 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 7806 tips and 7804 internal nodes ]
# Remove doubletons!
surface_PAFL_otu_pruned_rm2 <- prune_taxa(taxa_sums(surface_PAFL_otu_pruned_raw) > 2, surface_PAFL_otu_pruned_raw) 
surface_PAFL_otu_pruned_rm2
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 2979 taxa and 24 samples ]
## sample_data() Sample Data:       [ 24 samples by 44 sample variables ]
## tax_table()   Taxonomy Table:    [ 2979 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 2979 tips and 2977 internal nodes ]
# Remove tree for computational efficiency 
surface_PAFL_otu_pruned_notree_rm2 <- phyloseq(tax_table(surface_PAFL_otu_pruned_rm2), otu_table(surface_PAFL_otu_pruned_rm2), sample_data(surface_PAFL_otu_pruned_rm2))


# Gather the metadata in a dataframe to play with 
metadata <- data.frame(sample_data(surface_PAFL_otu_pruned_notree_rm2)) %>%
      mutate(fraction = factor(fraction, levels = c("WholePart","WholeFree")),
         lakesite = factor(lakesite,  levels = c("MOT", "MDP", "MBR", "MIN")),
         fraction = fct_recode(fraction, Particle = "WholePart", Free = "WholeFree"))
scaled_surface_PAFLA_pruned_rm2 <- scale_reads(surface_PAFL_otu_pruned_notree_rm2)

set.seed(777)

# Calculate the SOREN distance
soren_whole_dist <- ordinate(
  physeq = scaled_surface_PAFLA_pruned_rm2,
  method = "PCoA",
  distance = "bray",
  binary = TRUE) # Make it presence/absence
 
p1 <- plot_ordination(
  physeq = scaled_surface_PAFLA_pruned_rm2,
  ordination = soren_whole_dist,
  color = "fraction",
  shape = "lakesite",
  title = "Sørensen") +
  geom_point(size=5, alpha = 0.7, aes(fill =fraction,  color = fraction)) +
  scale_colour_manual(values = fraction_colors) +
  scale_fill_manual(values = fraction_colors) +
  scale_shape_manual(values = c(21, 22, 23, 24)) +
  theme(legend.position = "bottom")


# Calculate the BRAY-CURTIS distance
bray_whole_dist <- ordinate(
  physeq = scaled_surface_PAFLA_pruned_rm2,
  method = "PCoA",
  distance = "bray",
  binary = FALSE) # Make it Abundance weighted 
 
p2 <- plot_ordination(
  physeq = scaled_surface_PAFLA_pruned_rm2,
  ordination = bray_whole_dist ,
  color = "fraction",
  shape = "lakesite",
  title = "Bray-Curtis") +
  geom_point(size=5, alpha = 0.7, aes(fill =fraction,  color = fraction)) +
  scale_colour_manual(values = fraction_colors) +
  scale_fill_manual(values = fraction_colors) +
  scale_shape_manual(values = c(21, 22, 23, 24)) +
  theme(legend.position = "bottom")

plot_grid(p1, p2, labels = c("A", "B"), align = "h", ncol = 2)

Cell Count and Production Rates

######################################################### Fraction ABUNDANCe 
frac_abund_wilcox <- wilcox.test(log10(as.numeric(fraction_bac_abund)) ~ fraction, data = metadata)
frac_abund_wilcox
## 
##  Wilcoxon rank sum test
## 
## data:  log10(as.numeric(fraction_bac_abund)) by fraction
## W = 0, p-value = 1.479e-06
## alternative hypothesis: true location shift is not equal to 0
metadata %>%
  group_by(fraction) %>%
  summarize(mean(as.numeric(fraction_bac_abund)))
## # A tibble: 2 × 2
##   fraction `mean(as.numeric(fraction_bac_abund))`
##     <fctr>                                  <dbl>
## 1 Particle                               41168.88
## 2     Free                              734522.25
# Make a data frame to draw significance line in boxplot (visually calculated)
dat1 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(6.45,6.5,6.5,6.45)) # WholePart vs WholeFree

poster_a <- ggplot(filter(metadata, norep_filter_name != "MOTEJ515"), 
       aes(y = log10(as.numeric(fraction_bac_abund)), x = fraction)) +
  #scale_color_manual(values = fraction_colors) + 
  scale_fill_manual(values = fraction_colors) +
  geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) + 
  geom_boxplot(alpha = 0.5, outlier.shape = NA, aes( fill = fraction)) +
  ylab("Log10(Bacterial Cells/mL)") +
  ##### Particle vs free cell abundances 
  geom_path(data = dat1, aes(x = a, y = b), linetype = 1, color = "gray40") +
  annotate("text", x=1.5, y=6.5, fontface = "bold",  size = 3.5, color = "gray40",
           label= paste("***\np =", round(frac_abund_wilcox$p.value, digits = 6))) +
  theme(legend.position = "none",
        axis.title.x = element_blank())



######################################################### TOTAL PRODUCTION 
totprod_wilcox <- wilcox.test(frac_bacprod ~ fraction, data = metadata)
totprod_wilcox
## 
##  Wilcoxon rank sum test
## 
## data:  frac_bacprod by fraction
## W = 33, p-value = 0.02418
## alternative hypothesis: true location shift is not equal to 0
metadata %>%
  group_by(fraction) %>%
  summarize(mean(frac_bacprod))
## # A tibble: 2 × 2
##   fraction `mean(frac_bacprod)`
##     <fctr>                <dbl>
## 1 Particle             9.958333
## 2     Free            24.058333
# Make a data frame to draw significance line in boxplot (visually calculated)
dat2 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(67,68,68,67)) # WholePart vs WholeFree


poster_b <- ggplot(metadata, aes(y = frac_bacprod, x = fraction)) + 
  #scale_color_manual(values = fraction_colors) + 
  scale_fill_manual(values = fraction_colors) +
  geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) + 
  geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
  ylab("Heterotrophic Production (μgC/L/hr)") +
  ##### Particle vs free bulk production  
  geom_path(data = dat2, aes(x = a, y = b), linetype = 1, color = "gray40") +
  annotate("text", x=1.5, y=68, fontface = "bold",  size = 3.5, color = "gray40",
           label= paste("**\np =", round(totprod_wilcox$p.value, digits = 3))) +
  theme(legend.position = "none",
        axis.title.x = element_blank())



######################################################### TOTAL PRODUCTION 
percellprod_wilcox <- wilcox.test(log10(fracprod_per_cell) ~ fraction, data = filter(metadata, norep_filter_name != "MOTEJ515"))
percellprod_wilcox
## 
##  Wilcoxon rank sum test
## 
## data:  log10(fracprod_per_cell) by fraction
## W = 125, p-value = 6.656e-05
## alternative hypothesis: true location shift is not equal to 0
filter(metadata, norep_filter_name != "MOTEJ515") %>%
  group_by(fraction) %>%
  summarize(mean(fracprod_per_cell))
## # A tibble: 2 × 2
##   fraction `mean(fracprod_per_cell)`
##     <fctr>                     <dbl>
## 1 Particle              4.816116e-07
## 2     Free              3.866798e-08
# Make a data frame to draw significance line in boxplot (visually calculated)
dat3 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(-5.05,-5,-5,-5.05)) # WholePart vs WholeFree


poster_c <- ggplot(filter(metadata, norep_filter_name != "MOTEJ515"), 
       aes(y = log10(fracprod_per_cell), x = fraction)) +
  #scale_color_manual(values = fraction_colors) + 
  scale_fill_manual(values = fraction_colors) +
  geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) + 
  geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
  ylim(c(-8.5, -5)) + 
  ylab("log10(production/cell) (μgC/cell/hr)") +
  ##### Particle vs free per-cell production 
  geom_path(data = dat3, aes(x = a, y = b), linetype = 1, color = "gray40") +
  annotate("text", x=1.5, y=-5, fontface = "bold",  size = 3.5, color = "gray40",
           label= paste("***\np =", round(percellprod_wilcox$p.value, digits = 5))) +
  theme(legend.position = "none",
        axis.title.x = element_blank())

plot_grid(poster_a, poster_b, poster_c,
          labels = c("A", "B", "C"),
          ncol = 3)

work_df <- metadata %>%
  dplyr::select(norep_filter_name, fraction, fraction_bac_abund, frac_bacprod, fracprod_per_cell_noinf) %>%
  mutate(norep_water_name = paste(substr(norep_filter_name, 1,4), substr(norep_filter_name, 6,9), sep = "")) %>%
  dplyr::select(-norep_filter_name)

part_work_df <- work_df %>%
  filter(fraction == "Particle") %>%
  rename(part_bacabund = fraction_bac_abund,
         part_prod = frac_bacprod, 
         part_percell_prod = fracprod_per_cell_noinf) %>%
  dplyr::select(-fraction)

free_work_df <- work_df %>%
  filter(fraction == "Free") %>%
  rename(free_bacabund = fraction_bac_abund,
         free_prod = frac_bacprod, 
         free_percell_prod = fracprod_per_cell_noinf) %>%
  dplyr::select(-fraction)

byfrac_df <- part_work_df %>%
  left_join(free_work_df, by = "norep_water_name")

summary(lm(log10(part_bacabund) ~ log10(free_bacabund), data = filter(byfrac_df, norep_water_name != "MOTE515")))
## 
## Call:
## lm(formula = log10(part_bacabund) ~ log10(free_bacabund), data = filter(byfrac_df, 
##     norep_water_name != "MOTE515"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.52690 -0.08048  0.05903  0.19565  0.35255 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)
## (Intercept)            2.0900     3.2247   0.648    0.533
## log10(free_bacabund)   0.4264     0.5532   0.771    0.461
## 
## Residual standard error: 0.3111 on 9 degrees of freedom
## Multiple R-squared:  0.06194,    Adjusted R-squared:  -0.04229 
## F-statistic: 0.5943 on 1 and 9 DF,  p-value: 0.4605
plot1 <- ggplot(filter(byfrac_df, norep_water_name != "MOTE515"), 
       aes(x = log10(free_bacabund), y = log10(part_bacabund))) +
  xlab("Free") +  ylab("Particle") + 
  ggtitle("Log10(cells/mL)") +
  geom_point(size = 3, shape = 21, fill = "grey") 


lm_prod_corr <- lm(part_prod ~ free_prod, data = byfrac_df)

plot2 <- ggplot(byfrac_df, aes(x = free_prod, y = part_prod)) +
  xlab("Free") +  ylab("Particle") + 
  ggtitle("Heterotrophic Production \n(μgC/L/hr)") +
  geom_point(size = 3, shape = 21, fill = "grey") +
  geom_smooth(method = "lm", color = "black")  + 
  annotate("text", x =20, y= 30, color = "black", fontface = "bold", size = 4,
       label = paste("R2 =", round(summary(lm_prod_corr)$adj.r.squared, digits = 3), "\n", 
                     "p =", round(unname(summary(lm_prod_corr)$coefficients[,4][2]), digits = 3))) 


lm_percell_corr <- lm(log10(part_percell_prod) ~ log10(free_percell_prod), data = byfrac_df)

plot3 <- ggplot(byfrac_df,
       aes(x = log10(free_percell_prod), y = log10(part_percell_prod))) +  
  xlab("Free") +  ylab("Particle") + 
  ggtitle("log10(production/cell) \n (μgC/cell/hr)") +
  geom_point(size = 3, shape = 21, fill = "grey") +
  geom_smooth(method = "lm", color = "black") + 
  annotate("text", y = -5.8, x=-7.9, color = "black", fontface = "bold", size = 4,
       label = paste("R2 =", round(summary(lm_percell_corr)$adj.r.squared, digits = 3), "\n", 
                     "p =", round(unname(summary(lm_percell_corr)$coefficients[,4][2]), digits = 3))) 

plot_grid(plot1, plot2, plot3, 
          nrow = 1, ncol = 3, 
          labels = c("A", "B", "C"), 
          align = "h")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on '(μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on '(μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>

Calculate Diversity

# Set the seed for reproducibility
set.seed(777)

# Calculate the alpha diversity with 100 repsampling events
alpha_calc <- calc_alpha_diversity(physeq = surface_PAFL_otu_pruned_notree_rm2)

# What was the minimum sample size? 
min(sample_sums(surface_PAFL_otu_pruned_notree_rm2)) - 1
## [1] 6489
# Put it altogether in a dataframe 
otu_alphadiv <- calc_mean_alphadiv(physeq = surface_PAFL_otu_pruned_notree_rm2,
                           richness_df = alpha_calc$Richness, 
                           evenness_df = alpha_calc$Inverse_Simpson, 
                           shannon_df = alpha_calc$Shannon) %>%
    mutate(fraction = factor(fraction, levels = c("WholePart","WholeFree")),
         lakesite = factor(lakesite,  levels = c("MOT", "MDP", "MBR", "MIN")),
         measure = factor(measure, levels = c("Richness",  "Shannon_Entropy", "Inverse_Simpson", "Simpsons_Evenness")),
         fraction = fct_recode(fraction, Particle = "WholePart", Free = "WholeFree"))

Correlations between Diversity Measures

##########################################################################
###########################   CORRELATIONS   #############################
##########################################################################
# RICHNESS vs SHANNON
cor(filter(otu_alphadiv, measure == "Richness" & fraction == "Particle")$mean,
    filter(otu_alphadiv, measure == "Shannon_Entropy" & fraction == "Particle")$mean) # YES
## [1] 0.9406491
# SHANNON VS INVERSE SIMPSON
cor(filter(otu_alphadiv, measure == "Shannon_Entropy" & fraction == "Particle")$mean,
    filter(otu_alphadiv, measure == "Inverse_Simpson" &  fraction == "Particle")$mean) # YES
## [1] 0.9651242
# INVERSE SIMPSON VS SIMPSONS EVENNESS
cor(filter(otu_alphadiv, measure == "Inverse_Simpson" & fraction == "Particle")$mean,
    filter(otu_alphadiv, measure == "Simpsons_Evenness" & fraction == "Particle")$mean) # YES
## [1] 0.9145095
# SIMPSONS EVENNESS VS RICHNESS
cor(filter(otu_alphadiv, measure == "Simpsons_Evenness" & fraction == "Particle")$mean,
    filter(otu_alphadiv, measure == "Richness" &  fraction == "Particle")$mean) # YES
## [1] 0.6881205
ggplot(otu_alphadiv, aes(y = mean, x = fraction)) +
  ylab("Mean Diversity Value") +
  facet_wrap(~measure, scale = "free_y", ncol = 4) +
  geom_jitter(size = 3, shape = 21, aes(fill = fraction), color = "black") +
  geom_boxplot(alpha = 0.5, outlier.shape = NA, color = "black", aes(fill = fraction)) +
  scale_fill_manual(values = fraction_colors) + 
  scale_color_manual(values = fraction_colors) + 
  theme(legend.position = "none", axis.title.x = element_blank()) 

#################################### Bulk Measure Production 
# Richness
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Richness")))
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv, 
##     measure == "Richness"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.057 -12.017  -5.654   9.256  46.605 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 18.501168   9.039579   2.047   0.0528 .
## mean        -0.003335   0.018911  -0.176   0.8616  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.54 on 22 degrees of freedom
## Multiple R-squared:  0.001412,   Adjusted R-squared:  -0.04398 
## F-statistic: 0.0311 on 1 and 22 DF,  p-value: 0.8616
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Richness" & fraction == "Particle")))
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv, 
##     measure == "Richness" & fraction == "Particle"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.1271 -2.8865 -0.4649  3.7537  9.8401 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -11.269531   5.717450  -1.971  0.07701 . 
## mean          0.038125   0.009868   3.863  0.00314 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.476 on 10 degrees of freedom
## Multiple R-squared:  0.5988, Adjusted R-squared:  0.5587 
## F-statistic: 14.93 on 1 and 10 DF,  p-value: 0.003143
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Richness" & fraction == "Free")))
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv, 
##     measure == "Richness" & fraction == "Free"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.094 -11.112  -2.755   8.611  33.308 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  5.40793   21.62006   0.250    0.808
## mean         0.05511    0.06207   0.888    0.395
## 
## Residual standard error: 17.7 on 10 degrees of freedom
## Multiple R-squared:  0.07306,    Adjusted R-squared:  -0.01963 
## F-statistic: 0.7882 on 1 and 10 DF,  p-value: 0.3955
# Shannon
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Shannon_Entropy")))
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv, 
##     measure == "Shannon_Entropy"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -14.659 -11.878  -5.666   9.231  46.593 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.63373   26.71652   0.623    0.540
## mean         0.08797    6.22969   0.014    0.989
## 
## Residual standard error: 15.55 on 22 degrees of freedom
## Multiple R-squared:  9.064e-06,  Adjusted R-squared:  -0.04545 
## F-statistic: 0.0001994 on 1 and 22 DF,  p-value: 0.9889
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Shannon_Entropy" & fraction == "Particle")))
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv, 
##     measure == "Shannon_Entropy" & fraction == "Particle"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.9022 -2.9150 -0.5875  1.6713 12.0544 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  -40.320     14.007  -2.878  0.01643 * 
## mean          11.089      3.068   3.614  0.00473 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.693 on 10 degrees of freedom
## Multiple R-squared:  0.5664, Adjusted R-squared:  0.5231 
## F-statistic: 13.06 on 1 and 10 DF,  p-value: 0.004734
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Shannon_Entropy" & fraction == "Free")))
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv, 
##     measure == "Shannon_Entropy" & fraction == "Free"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.004 -10.708  -3.738   6.632  37.129 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   -13.37      73.87  -0.181    0.860
## mean            9.40      18.50   0.508    0.622
## 
## Residual standard error: 18.15 on 10 degrees of freedom
## Multiple R-squared:  0.02516,    Adjusted R-squared:  -0.07233 
## F-statistic: 0.2581 on 1 and 10 DF,  p-value: 0.6225
# Inverse Simpson
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Inverse_Simpson")))
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv, 
##     measure == "Inverse_Simpson"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.269 -10.298  -4.916   5.866  46.452 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  12.1577     6.0225   2.019   0.0559 .
## mean          0.1629     0.1731   0.941   0.3570  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.25 on 22 degrees of freedom
## Multiple R-squared:  0.03868,    Adjusted R-squared:  -0.005019 
## F-statistic: 0.8851 on 1 and 22 DF,  p-value: 0.357
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Inverse_Simpson" & fraction == "Particle")))
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv, 
##     measure == "Inverse_Simpson" & fraction == "Particle"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.5589 -2.1093 -0.1969  0.8752  7.8282 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.45199    2.43866  -0.185  0.85666    
## mean         0.29344    0.05781   5.076  0.00048 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.571 on 10 degrees of freedom
## Multiple R-squared:  0.7204, Adjusted R-squared:  0.6925 
## F-statistic: 25.77 on 1 and 10 DF,  p-value: 0.0004804
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Inverse_Simpson" & fraction == "Free")))
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv, 
##     measure == "Inverse_Simpson" & fraction == "Free"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.765  -9.356  -4.445   6.116  36.017 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  11.0985    16.7052   0.664    0.521
## mean          0.5379     0.6598   0.815    0.434
## 
## Residual standard error: 17.8 on 10 degrees of freedom
## Multiple R-squared:  0.06233,    Adjusted R-squared:  -0.03143 
## F-statistic: 0.6648 on 1 and 10 DF,  p-value: 0.4339
# Simpson's Evenness 
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Simpsons_Evenness")))
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv, 
##     measure == "Simpsons_Evenness"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -17.000  -7.163  -3.815   3.392  45.845 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  -0.8524     9.2286  -0.092   0.9272  
## mean        274.1606   134.4253   2.040   0.0536 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.26 on 22 degrees of freedom
## Multiple R-squared:  0.159,  Adjusted R-squared:  0.1208 
## F-statistic:  4.16 on 1 and 22 DF,  p-value: 0.05359
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Simpsons_Evenness" & fraction == "Particle")))
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv, 
##     measure == "Simpsons_Evenness" & fraction == "Particle"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.973  -2.229  -1.086   1.356  12.380 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   -3.829      4.539  -0.844  0.41865   
## mean         234.057     71.238   3.286  0.00821 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.995 on 10 degrees of freedom
## Multiple R-squared:  0.5191, Adjusted R-squared:  0.471 
## F-statistic: 10.79 on 1 and 10 DF,  p-value: 0.008211
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Simpsons_Evenness" & fraction == "Free")))
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv, 
##     measure == "Simpsons_Evenness" & fraction == "Free"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -19.389 -12.029  -3.306   4.668  39.946 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)    15.85      23.52   0.674    0.516
## mean          114.96     321.02   0.358    0.728
## 
## Residual standard error: 18.27 on 10 degrees of freedom
## Multiple R-squared:  0.01266,    Adjusted R-squared:  -0.08607 
## F-statistic: 0.1282 on 1 and 10 DF,  p-value: 0.7277
#################################### Per-Cell Production 
# Richness
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Richness")))
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, 
##     measure == "Richness"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8668 -0.2124  0.1045  0.2133  0.6473 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8.4591277  0.2212633 -38.231  < 2e-16 ***
## mean         0.0028988  0.0004641   6.246  3.4e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3804 on 21 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.6501, Adjusted R-squared:  0.6334 
## F-statistic: 39.02 on 1 and 21 DF,  p-value: 3.395e-06
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Richness" & fraction == "Particle")))
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, 
##     measure == "Richness" & fraction == "Particle"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55347 -0.21545 -0.01066  0.12536  0.58830 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8.0617648  0.3717671 -21.685 4.44e-09 ***
## mean         0.0023794  0.0006348   3.748  0.00457 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3506 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.6095, Adjusted R-squared:  0.5662 
## F-statistic: 14.05 on 1 and 9 DF,  p-value: 0.004567
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Richness" & fraction == "Free")))
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, 
##     measure == "Richness" & fraction == "Free"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.71719 -0.13833  0.07155  0.23581  0.56949 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8.135758   0.471253 -17.264 8.99e-09 ***
## mean         0.001657   0.001353   1.225    0.249    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3859 on 10 degrees of freedom
## Multiple R-squared:  0.1304, Adjusted R-squared:  0.04344 
## F-statistic: 1.499 on 1 and 10 DF,  p-value: 0.2488
# Shannon
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Shannon_Entropy")))
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, 
##     measure == "Shannon_Entropy"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.03514 -0.15042 -0.03394  0.26568  0.82794 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -10.9036     0.7534 -14.472 2.14e-12 ***
## mean          0.8805     0.1763   4.993 6.09e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4348 on 21 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.5428, Adjusted R-squared:  0.521 
## F-statistic: 24.93 on 1 and 21 DF,  p-value: 6.09e-05
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Shannon_Entropy" & fraction == "Particle")))
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, 
##     measure == "Shannon_Entropy" & fraction == "Particle"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.36686 -0.23571 -0.01330  0.03961  0.70312 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -9.8897     0.8820 -11.213 1.37e-06 ***
## mean          0.6993     0.1935   3.614  0.00562 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3584 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.5921, Adjusted R-squared:  0.5468 
## F-statistic: 13.06 on 1 and 9 DF,  p-value: 0.00562
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Shannon_Entropy" & fraction == "Free")))
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, 
##     measure == "Shannon_Entropy" & fraction == "Free"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.72444 -0.17785  0.08114  0.13946  0.67366 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -8.8666     1.6332  -5.429 0.000289 ***
## mean          0.3243     0.4091   0.793 0.446298    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4014 on 10 degrees of freedom
## Multiple R-squared:  0.05914,    Adjusted R-squared:  -0.03495 
## F-statistic: 0.6285 on 1 and 10 DF,  p-value: 0.4463
# Inverse Simpson
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Inverse_Simpson")))
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, 
##     measure == "Inverse_Simpson"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.97302 -0.28199 -0.05285  0.32003  0.99088 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -7.86039    0.18153 -43.301  < 2e-16 ***
## mean         0.02355    0.00525   4.485 0.000204 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4596 on 21 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.4893, Adjusted R-squared:  0.4649 
## F-statistic: 20.12 on 1 and 21 DF,  p-value: 0.0002037
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Inverse_Simpson" & fraction == "Particle")))
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, 
##     measure == "Inverse_Simpson" & fraction == "Particle"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.28651 -0.18384 -0.11125  0.07337  0.56444 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -7.360961   0.159490 -46.153 5.27e-12 ***
## mean         0.018087   0.003759   4.812 0.000958 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2969 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.7201, Adjusted R-squared:  0.689 
## F-statistic: 23.15 on 1 and 9 DF,  p-value: 0.0009581
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Inverse_Simpson" & fraction == "Free")))
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, 
##     measure == "Inverse_Simpson" & fraction == "Free"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.68269 -0.11512  0.01325  0.17742  0.63175 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8.03513    0.35685 -22.517 6.72e-10 ***
## mean         0.01910    0.01409   1.355    0.205    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3803 on 10 degrees of freedom
## Multiple R-squared:  0.1551, Adjusted R-squared:  0.07064 
## F-statistic: 1.836 on 1 and 10 DF,  p-value: 0.2052
# Simpson's Evenness 
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Simpsons_Evenness")))
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, 
##     measure == "Simpsons_Evenness"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.06874 -0.41920  0.04553  0.34511  1.56565 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.4496     0.4116  -18.10 2.74e-14 ***
## mean          4.3470     6.0344    0.72    0.479    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6353 on 21 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.02411,    Adjusted R-squared:  -0.02236 
## F-statistic: 0.5189 on 1 and 21 DF,  p-value: 0.4792
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Simpsons_Evenness" & fraction == "Particle")))
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, 
##     measure == "Simpsons_Evenness" & fraction == "Particle"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4726 -0.2230 -0.1106  0.1248  0.6887 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.5941     0.2885 -26.324 7.96e-10 ***
## mean         15.1887     4.6341   3.278  0.00957 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3788 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.5441, Adjusted R-squared:  0.4935 
## F-statistic: 10.74 on 1 and 9 DF,  p-value: 0.009566
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Simpsons_Evenness" & fraction == "Free")))
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, 
##     measure == "Simpsons_Evenness" & fraction == "Free"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.63112 -0.24216  0.01388  0.14672  0.77426 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.9274     0.5202 -15.240    3e-08 ***
## mean          4.9361     7.1008   0.695    0.503    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4041 on 10 degrees of freedom
## Multiple R-squared:  0.04609,    Adjusted R-squared:  -0.0493 
## F-statistic: 0.4832 on 1 and 10 DF,  p-value: 0.5028
prodiv_plot1 <- ggplot(filter(otu_alphadiv, measure %in% c("Shannon_Entropy", "Simpsons_Evenness")),
       aes(x = mean, y = frac_bacprod)) +
  ylab("Heterotrophic Production \n(ug C/L/hr)") +
  xlab("Mean Diversity Value") +
  facet_wrap(~measure, scale = "free_x", ncol = 4) +
  geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_point(size = 3, shape = 21, aes(fill = fraction), color = "black") +
  scale_fill_manual(values = fraction_colors) + 
  scale_color_manual(values = fraction_colors) + 
  theme(legend.position = "none", axis.title.x = element_blank()) +
  geom_smooth(method = "lm", 
              data = filter(otu_alphadiv, measure %in% c("Shannon_Entropy", "Simpsons_Evenness") &
                              fraction == "Particle"), 
              color = "#FF6600", fill = "#FF6600", alpha = 0.3)

prodiv_plot2 <- ggplot(filter(otu_alphadiv, measure %in% c("Shannon_Entropy", "Simpsons_Evenness")),
       aes(x = mean, y = log10(fracprod_per_cell_noinf))) +
  ylab("log10(production/cell) \n (ug C/cell/hr)") +
  xlab("Mean Diversity Value") +
  facet_wrap(~measure, scale = "free_x", ncol = 4) +
  geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_point(size = 3, shape = 21, aes(fill = fraction), color = "black") +
  scale_fill_manual(values = fraction_colors) + 
  scale_color_manual(values = fraction_colors) + 
  theme(legend.position = "none", axis.title.x = element_blank()) +
  geom_smooth(method = "lm", 
              data = filter(otu_alphadiv, measure %in% c("Shannon_Entropy", "Simpsons_Evenness") &
                              fraction == "Particle"), 
              color = "#FF6600", fill = "#FF6600", alpha = 0.3)


plot_grid(prodiv_plot1, prodiv_plot2, 
          labels = c("A", "B"), 
          nrow = 2, ncol = 1)
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_errorbarh).
## Warning: Removed 2 rows containing missing values (geom_point).

lm_simpseven_bulkprod <- lm(frac_bacprod ~ mean, 
                            data = filter(otu_alphadiv, measure == "Simpsons_Evenness"))

ggplot(filter(otu_alphadiv, measure == "Simpsons_Evenness"), 
       aes(x = mean, y = frac_bacprod)) + 
  geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) +  # Y-axis errorbars
  ylab("Heterotrophic Production \n(ug C/L/hr)") +
  xlab("Simpson's Evenness") + 
  geom_point(size = 3, shape = 21, aes(fill = fraction)) + 
  geom_smooth(method = "lm", color = "#424645", fill = "#424645", alpha = 0.3) +
  scale_fill_manual(values = fraction_colors) +
  scale_color_manual(values = fraction_colors) +
  theme(legend.position = c(0.15, 0.9), legend.title = element_blank()) +
  annotate("text", x = 0.115, y=55, color = "#424645", fontface = "bold", size = 4,
           label = paste("R2 =", round(summary(lm_simpseven_bulkprod)$adj.r.squared, digits = 2), "\n", 
                         "p =", round(unname(summary(lm_simpseven_bulkprod)$coefficients[,4][2]), digits = 3))) 

######################################################### OBSERVED RICHNESS 
richness <- filter(otu_alphadiv, measure == "Richness")

rich_fraction_wilcox <- wilcox.test(mean ~ fraction, data = richness)
rich_fraction_wilcox
## 
##  Wilcoxon rank sum test
## 
## data:  mean by fraction
## W = 129, p-value = 0.0004955
## alternative hypothesis: true location shift is not equal to 0
filter(richness) %>%
  group_by(fraction) %>%
  summarize(mean(mean))
## # A tibble: 2 × 2
##   fraction `mean(mean)`
##     <fctr>        <dbl>
## 1 Particle     556.7992
## 2     Free     338.4242
# Make a data frame to draw significance line in boxplot (visually calculated)
rich_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(920, 930, 930, 920)) # WholePart vs WholeFree

rich_distribution_plot <- 
  ggplot(richness, aes(y = mean, x = fraction)) +
  #scale_color_manual(values = fraction_colors) + 
  scale_fill_manual(values = fraction_colors) +
  geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) + 
  geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
  scale_y_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) + 
  xlab("Observed Richness") +
  xlab("Fraction") + 
  ##### Particle vs free per-cell production 
  geom_path(data = rich_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
  annotate("text", x=1.5, y=790, fontface = "bold",  size = 4, color = "#424645",
           label= paste("***\np =", round(rich_fraction_wilcox$p.value, digits = 5))) +
  theme(legend.position = "none",# axis.title.y = element_blank(),
        axis.title.x = element_blank(), axis.text.x = element_blank()) +
  coord_flip()




# Richness
summary(lm(frac_bacprod ~ mean, data = richness)) 
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = richness)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.057 -12.017  -5.654   9.256  46.605 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 18.501168   9.039579   2.047   0.0528 .
## mean        -0.003335   0.018911  -0.176   0.8616  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.54 on 22 degrees of freedom
## Multiple R-squared:  0.001412,   Adjusted R-squared:  -0.04398 
## F-statistic: 0.0311 on 1 and 22 DF,  p-value: 0.8616
lm_prod_vs_rich <- lm(frac_bacprod ~ mean, data = filter(richness, fraction == "Particle"))
summary(lm_prod_vs_rich)
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(richness, fraction == 
##     "Particle"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.1271 -2.8865 -0.4649  3.7537  9.8401 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -11.269531   5.717450  -1.971  0.07701 . 
## mean          0.038125   0.009868   3.863  0.00314 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.476 on 10 degrees of freedom
## Multiple R-squared:  0.5988, Adjusted R-squared:  0.5587 
## F-statistic: 14.93 on 1 and 10 DF,  p-value: 0.003143
summary(lm(frac_bacprod ~ mean, data = filter(richness, fraction == "Free")))
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(richness, fraction == 
##     "Free"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.094 -11.112  -2.755   8.611  33.308 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  5.40793   21.62006   0.250    0.808
## mean         0.05511    0.06207   0.888    0.395
## 
## Residual standard error: 17.7 on 10 degrees of freedom
## Multiple R-squared:  0.07306,    Adjusted R-squared:  -0.01963 
## F-statistic: 0.7882 on 1 and 10 DF,  p-value: 0.3955
# Plot 
prod_vs_rich_plot <-  
  ggplot(richness, aes(x=mean, y=frac_bacprod)) + 
  geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) +  # Y-axis errorbars
  geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
  scale_color_manual(values = fraction_colors) +
  scale_fill_manual(values = fraction_colors) +
  ylab("Heterotrophic Production \n (μgC/L/hr)") + 
  xlab("Observed Richness") +
  geom_smooth(data=filter(richness, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") + 
  scale_x_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) + 
  annotate("text", x = 790, y=45, color = "#FF6600", fontface = "bold", size = 4,
           label = paste("R2 =", round(summary(lm_prod_vs_rich)$adj.r.squared, digits = 2), "\n", 
                         "p =", round(unname(summary(lm_prod_vs_rich)$coefficients[,4][2]), digits = 3))) +
  theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank()) 


# Richness vs per cell production 
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = richness)) 
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = richness)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8668 -0.2124  0.1045  0.2133  0.6473 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8.4591277  0.2212633 -38.231  < 2e-16 ***
## mean         0.0028988  0.0004641   6.246  3.4e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3804 on 21 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.6501, Adjusted R-squared:  0.6334 
## F-statistic: 39.02 on 1 and 21 DF,  p-value: 3.395e-06
lm_percell_prod_vs_rich <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Particle"))
summary(lm_percell_prod_vs_rich)
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, 
##     fraction == "Particle"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55347 -0.21545 -0.01066  0.12536  0.58830 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8.0617648  0.3717671 -21.685 4.44e-09 ***
## mean         0.0023794  0.0006348   3.748  0.00457 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3506 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.6095, Adjusted R-squared:  0.5662 
## F-statistic: 14.05 on 1 and 9 DF,  p-value: 0.004567
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Free")))
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, 
##     fraction == "Free"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.71719 -0.13833  0.07155  0.23581  0.56949 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8.135758   0.471253 -17.264 8.99e-09 ***
## mean         0.001657   0.001353   1.225    0.249    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3859 on 10 degrees of freedom
## Multiple R-squared:  0.1304, Adjusted R-squared:  0.04344 
## F-statistic: 1.499 on 1 and 10 DF,  p-value: 0.2488
# Plot 
percell_prod_vs_rich_plot <- 
  ggplot(richness, aes(x=mean, y=log10(fracprod_per_cell_noinf))) + 
  geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
  scale_color_manual(values = fraction_colors) +
  scale_fill_manual(values = fraction_colors) +
  ylab("log10(production/cell)\n (μgC/cell/hr)") +
  xlab("Observed Richness") +
  geom_smooth(data=filter(richness, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") + 
  scale_x_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) + 
  #scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) + 
  annotate("text", x = 790, y=-7.5, color = "#FF6600", fontface = "bold", size = 4,
           label = paste("R2 =", round(summary(lm_prod_vs_rich)$adj.r.squared, digits = 2), "\n", 
                         "p =", round(unname(summary(lm_prod_vs_rich)$coefficients[,4][2]), digits = 3))) + 
  #annotate("text", x = 250, y=55, color = "skyblue", fontface = "bold", label = paste("Free = NS")) +
  theme(legend.title = element_blank(), legend.position ="bottom", 
        legend.text = element_text(size = 14))



rich_plots <- plot_grid(rich_distribution_plot, prod_vs_rich_plot, percell_prod_vs_rich_plot,
          labels = c("A", "C", "E"), ncol = 1, nrow = 3,
          rel_heights = c(0.5, 0.8, 1.1),
          align = "v")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
rich_plots

######################################################### INVERSE SIMPSON
invsimps <- filter(otu_alphadiv, measure == "Inverse_Simpson")

invsimps_fraction_wilcox <- wilcox.test(mean ~ fraction, data = invsimps)
invsimps_fraction_wilcox
## 
##  Wilcoxon rank sum test
## 
## data:  mean by fraction
## W = 81, p-value = 0.6297
## alternative hypothesis: true location shift is not equal to 0
filter(invsimps) %>%
  group_by(fraction) %>%
  summarize(mean(mean))
## # A tibble: 2 × 2
##   fraction `mean(mean)`
##     <fctr>        <dbl>
## 1 Particle     35.47659
## 2     Free     24.09219
# Make a data frame to draw significance line in boxplot (visually calculated)
invsimps_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(83,85,85,83)) # WholePart vs WholeFree

invsimps_distribution_plot <- 
  ggplot(invsimps, aes(y = mean, x = fraction)) +
  #scale_color_manual(values = fraction_colors) + 
  scale_fill_manual(values = fraction_colors) +
  geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) + 
  geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
  scale_y_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) + 
  ylab("Inverse Simpson") +
  xlab("Fraction") + 
  ##### Particle vs free per-cell production 
  geom_path(data = invsimps_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
  annotate("text", x=1.5, y=80, fontface = "bold",  size = 4, color = "#424645", label= "NS") +
  theme(legend.position = "none", #axis.title.y = element_blank(),
        axis.title.x = element_blank(), axis.text.x = element_blank()) +
  coord_flip()





# INVERSE SIMPSON VS BULK PRODUCTION
summary(lm(frac_bacprod ~ mean, data = invsimps)) 
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = invsimps)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.269 -10.298  -4.916   5.866  46.452 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  12.1577     6.0225   2.019   0.0559 .
## mean          0.1629     0.1731   0.941   0.3570  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.25 on 22 degrees of freedom
## Multiple R-squared:  0.03868,    Adjusted R-squared:  -0.005019 
## F-statistic: 0.8851 on 1 and 22 DF,  p-value: 0.357
lm_prod_vs_invsimps <- lm(frac_bacprod ~ mean, data = filter(invsimps, fraction == "Particle"))
summary(lm_prod_vs_invsimps)
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(invsimps, fraction == 
##     "Particle"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.5589 -2.1093 -0.1969  0.8752  7.8282 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.45199    2.43866  -0.185  0.85666    
## mean         0.29344    0.05781   5.076  0.00048 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.571 on 10 degrees of freedom
## Multiple R-squared:  0.7204, Adjusted R-squared:  0.6925 
## F-statistic: 25.77 on 1 and 10 DF,  p-value: 0.0004804
summary(lm(frac_bacprod ~ mean, data = filter(invsimps, fraction == "Free")))
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(invsimps, fraction == 
##     "Free"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.765  -9.356  -4.445   6.116  36.017 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  11.0985    16.7052   0.664    0.521
## mean          0.5379     0.6598   0.815    0.434
## 
## Residual standard error: 17.8 on 10 degrees of freedom
## Multiple R-squared:  0.06233,    Adjusted R-squared:  -0.03143 
## F-statistic: 0.6648 on 1 and 10 DF,  p-value: 0.4339
# Plot 
prod_vs_invsimps_plot <-  
  ggplot(invsimps, aes(x=mean, y=frac_bacprod)) + 
  geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) +  # Y-axis errorbars
  geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
  scale_color_manual(values = fraction_colors) +
  scale_fill_manual(values = fraction_colors) +
  ylab("Heterotrophic Production \n (μgC/L/hr)") + 
  xlab("Inverse Simpson") +
  geom_smooth(data=filter(invsimps, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") + 
  scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) + 
  annotate("text", x = 70, y=45, color = "#FF6600", fontface = "bold",size = 4,
           label = paste("R2 =", round(summary(lm_prod_vs_invsimps)$adj.r.squared, digits = 2), "\n", 
                         "p =", round(unname(summary(lm_prod_vs_invsimps)$coefficients[,4][2]), digits = 4))) +
  theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank()) 



# Inverse Simpson vs per-cell production
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = invsimps)) 
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = invsimps)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.97302 -0.28199 -0.05285  0.32003  0.99088 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -7.86039    0.18153 -43.301  < 2e-16 ***
## mean         0.02355    0.00525   4.485 0.000204 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4596 on 21 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.4893, Adjusted R-squared:  0.4649 
## F-statistic: 20.12 on 1 and 21 DF,  p-value: 0.0002037
lm_percell_prod_vs_invsimps <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Particle"))
summary(lm_percell_prod_vs_invsimps)
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, 
##     fraction == "Particle"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.28651 -0.18384 -0.11125  0.07337  0.56444 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -7.360961   0.159490 -46.153 5.27e-12 ***
## mean         0.018087   0.003759   4.812 0.000958 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2969 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.7201, Adjusted R-squared:  0.689 
## F-statistic: 23.15 on 1 and 9 DF,  p-value: 0.0009581
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Free")))
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, 
##     fraction == "Free"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.68269 -0.11512  0.01325  0.17742  0.63175 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8.03513    0.35685 -22.517 6.72e-10 ***
## mean         0.01910    0.01409   1.355    0.205    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3803 on 10 degrees of freedom
## Multiple R-squared:  0.1551, Adjusted R-squared:  0.07064 
## F-statistic: 1.836 on 1 and 10 DF,  p-value: 0.2052
# Plot 
percell_prod_vs_invsimps_plot <- 
  ggplot(invsimps, aes(x=mean, y=log10(fracprod_per_cell_noinf))) + 
  geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
  scale_color_manual(values = fraction_colors) +
  scale_fill_manual(values = fraction_colors) +
  ylab("log10(production/cell)\n (μgC/cell/hr)") +
  xlab("Inverse Simpson") +
  geom_smooth(data=filter(invsimps, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") + 
  scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) + 
  #scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) + 
  annotate("text", x = 70, y=-7.5, color = "#FF6600", fontface = "bold",size = 4,
           label = paste("R2 =", round(summary(lm_percell_prod_vs_invsimps)$adj.r.squared, digits = 2), "\n", 
                         "p =", round(unname(summary(lm_percell_prod_vs_invsimps)$coefficients[,4][2]), digits = 4))) +
  theme(legend.title = element_blank(), legend.position ="bottom", 
        legend.text = element_text(size = 14))



invsimps_plots <- plot_grid(invsimps_distribution_plot, prod_vs_invsimps_plot, percell_prod_vs_invsimps_plot,
          labels = c("B", "D", "F"), ncol = 1, nrow = 3,
          rel_heights = c(0.5, 0.8, 1.1),
          align = "v")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
invsimps_plots

plot_grid(rich_plots, invsimps_plots,
          ncol = 2, nrow = 1,
          align = "h")

invsimps_plots_noyaxis <- 
  plot_grid(invsimps_distribution_plot + theme(axis.title.y = element_blank(), axis.text.y = element_blank()), 
            prod_vs_invsimps_plot + theme(axis.title.y = element_blank()), 
            percell_prod_vs_invsimps_plot + theme(axis.title.y = element_blank()), 
          labels = c("B", "D", "F"), ncol = 1, nrow = 3,
          rel_heights = c(0.5, 0.8, 1.1),
          align = "v")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
plot_grid(rich_plots, invsimps_plots_noyaxis,
          ncol = 2, nrow = 1, rel_widths = c(1, 0.825),
          align = "h")

Phylogenetic Diversity

Is there a relationship between randomized richness and unweighted SESMPD?

# Read in the tree
RAREFIED_rm2_fasttree <- read.tree(file = "data/PhyloTree/newick_tree_rm2_rmN.tre")
  
# Load in data that has doubletons removed 
load("data/PhyloTree/surface_PAFL_otu_pruned_RAREFIED_rm2.RData")
surface_PAFL_otu_pruned_RAREFIED_rm2
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1891 taxa and 24 samples ]
## sample_data() Sample Data:       [ 24 samples by 70 sample variables ]
## tax_table()   Taxonomy Table:    [ 1891 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 1891 tips and 1889 internal nodes ]
# Create the OTU table for picante 
surface_PAFL_RAREFIED_rm2_otu <- matrix(otu_table(surface_PAFL_otu_pruned_RAREFIED_rm2), nrow = nrow(otu_table(surface_PAFL_otu_pruned_RAREFIED_rm2)))
rownames(surface_PAFL_RAREFIED_rm2_otu) <- sample_names(surface_PAFL_otu_pruned_RAREFIED_rm2)
colnames(surface_PAFL_RAREFIED_rm2_otu) <- taxa_names(surface_PAFL_otu_pruned_RAREFIED_rm2)
    
  
## Calculate input for SES_MPD  
# Convert the abundance data to standardized abundanced vegan function `decostand' , NOTE: method = "total"
otu_decostand_total <- decostand(surface_PAFL_RAREFIED_rm2_otu, method = "total")
# check total abundance in each sample
apply(otu_decostand_total, 1, sum)
## MBREJ515 MBREJ715 MBREJ915 MBREK515 MBREK715 MBREK915 MDPEJ515 MDPEJ715 MDPEJ915 MDPEK515 MDPEK715 MDPEK915 MINEJ515 MINEJ715 MINEJ915 MINEK515 MINEK715 MINEK915 MOTEJ515 MOTEJ715 MOTEJ915 MOTEK515 MOTEK715 MOTEK915 
##        1        1        1        1        1        1        1        1        1        1        1        1        1        1        1        1        1        1        1        1        1        1        1        1
# check for mismatches/missing species between community data and phylo tree
RAREFIED_rm2_matches <- match.phylo.comm(RAREFIED_rm2_fasttree, otu_decostand_total)
# the resulting object is a list with $phy and $comm elements.  replace our
# original data with the sorted/matched data
phy_RAREFIED_rm2 <- RAREFIED_rm2_matches$phy
comm_RAREFIED_rm2 <- RAREFIED_rm2_matches$comm

# Calculate the phylogenetic distances
phy_dist_RAREFIED_rm2 <- cophenetic(phy_RAREFIED_rm2)

  
## Calculate SES_MPD
###################################### INDEPENDENT SWAP ############################################
# calculate standardized effect size mean pairwise distance (ses.mpd)
unweighted_sesMPD_indepswap_RAREFIED_rm2 <- ses.mpd(comm_RAREFIED_rm2, phy_dist_RAREFIED_rm2, null.model = "independentswap", 
                                     abundance.weighted = FALSE, runs = 999)

WEIGHTED_sesMPD_indepswap_RAREFIED_rm2 <- ses.mpd(comm_RAREFIED_rm2, phy_dist_RAREFIED_rm2, null.model = "independentswap", 
                                     abundance.weighted = TRUE, runs = 999)


# Gather div info
rich_df <- filter(otu_alphadiv, measure == "Richness") %>%
  dplyr::select(norep_filter_name, mean, sd, frac_bacprod, SD_frac_bacprod, fracprod_per_cell_noinf)

invsimps_df <- filter(otu_alphadiv, measure == "Inverse_Simpson") %>%
  dplyr::select(norep_filter_name, mean, sd, frac_bacprod, SD_frac_bacprod, fracprod_per_cell_noinf)


# Prepare to be merged with each other 
unweighted_df <- unweighted_sesMPD_indepswap_RAREFIED_rm2 %>%
  rownames_to_column("names") %>%
  mutate(phylo_measure = "Unweighted_SESMPD") %>%
  make_metadata_norep() %>%
  mutate(fraction = fct_recode(fraction, Particle = "WholePart", Free = "WholeFree")) %>%
  rename(norep_filter_name = names) %>%
  left_join(rich_df, by = "norep_filter_name") %>%
  mutate(lakesite = factor(lakesite, levels = c("MOT", "MDP","MBR", "MIN")))
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining factor and character vector, coercing into character vector
weighted_df <- WEIGHTED_sesMPD_indepswap_RAREFIED_rm2 %>%
  rownames_to_column("names") %>%
  mutate(phylo_measure = "Weighted_SESMPD") %>%
  make_metadata_norep() %>%
  mutate(fraction = fct_recode(fraction, Particle = "WholePart", Free = "WholeFree")) %>%
  rename(norep_filter_name = names) %>%
  left_join(invsimps_df, by = "norep_filter_name") %>%
  mutate(lakesite = factor(lakesite, levels = c("MOT", "MDP","MBR", "MIN")))
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining factor and character vector, coercing into character vector
# Test by station
part_unweighted_df <- filter(unweighted_df, fraction == "Particle")
kruskal.test(mpd.obs.z ~ lakesite, data = filter(unweighted_df, fraction == "Particle"))
## 
##  Kruskal-Wallis rank sum test
## 
## data:  mpd.obs.z by lakesite
## Kruskal-Wallis chi-squared = 5.8718, df = 3, p-value = 0.118
unweighted_df %>%
 filter(fraction == "Particle") %>%
  group_by(lakesite) %>%
  summarize(mean(mpd.obs.z))
## # A tibble: 4 × 2
##   lakesite `mean(mpd.obs.z)`
##     <fctr>             <dbl>
## 1      MOT         0.7949746
## 2      MDP        -0.9214585
## 3      MBR        -0.7819050
## 4      MIN        -1.0800320
# Lakesite 
plot_part_unweight_lakesite <- ggplot(filter(unweighted_df, fraction == "Particle"),
       aes(y = mpd.obs.z, x = lakesite)) +
  ggtitle("Particle-Associated Samples Only") + 
  scale_fill_manual(values = lakesite_colors) +
  ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
  geom_jitter(size = 3, shape = 21, aes(fill = lakesite), width = 0.2) + 
  geom_boxplot(aes(fill = lakesite), alpha = 0.5) +
  scale_x_discrete(breaks=c("MOT", "MDP", "MBR", "MIN"),
                      labels=c("Outlet", "Deep", "Bear", "River")) + 
  theme(axis.title.x = element_blank(),
        legend.position = c(0.85, 0.9))

# Season
plot_part_unweight_season <- ggplot(filter(unweighted_df, fraction == "Particle"),
       aes(y = mpd.obs.z, x = season)) +
  ggtitle("Particle-Associated Samples Only") + 
  ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
  scale_fill_manual(values = season_colors) +
  geom_jitter(size = 3, shape = 21, aes(fill = season), width = 0.2) + 
  geom_boxplot(aes(fill = season), alpha = 0.5) + 
  theme(axis.title.x = element_blank(), axis.title.y = element_blank(),
        legend.position = c(0.9, 0.9))

plot_grid(plot_part_unweight_lakesite, plot_part_unweight_season, 
          labels = c("A", "B"), ncol = 2, nrow = 1, align = "h")

######################################################### SES MPD DISTRIBUTION: UNWEIGHTED  
unweighted_fraction_wilcox <- wilcox.test(mpd.obs.z ~ fraction, data = unweighted_df)
unweighted_fraction_wilcox
## 
##  Wilcoxon rank sum test
## 
## data:  mpd.obs.z by fraction
## W = 30, p-value = 0.01449
## alternative hypothesis: true location shift is not equal to 0
unweighted_df %>%
  group_by(fraction) %>%
  summarize(mean(mpd.obs.z))
## # A tibble: 2 × 2
##   fraction `mean(mpd.obs.z)`
##     <fctr>             <dbl>
## 1 Particle        -0.4971052
## 2     Free         0.4374514
# Make a data frame to draw significance line in boxplot (visually calculated)
dat4 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(1.6,1.7,1.7,1.6)) # WholePart vs WholeFree


unweight_distribution_plot <- 
  ggplot(unweighted_df, aes(y = mpd.obs.z, x = fraction)) +
  #scale_color_manual(values = fraction_colors) + 
  scale_fill_manual(values = fraction_colors) +
  geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) + 
  geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
  scale_y_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) + 
  ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
  xlab("Fraction") + 
  geom_hline(yintercept = 0, linetype = "dashed", size = 1.5) +
  ##### Particle vs free per-cell production 
  geom_path(data = dat4, aes(x = a, y = b), linetype = 1, color = "#424645") +
  annotate("text", x=1.5, y=1.35, fontface = "bold",  size = 4, color = "#424645",
           label= paste("***\np =", round(unweighted_fraction_wilcox$p.value, digits = 2))) +
  theme(legend.position = "none",# axis.title.y = element_blank(),
        axis.title.x = element_blank(), axis.text.x = element_blank()) +
  coord_flip()


######################################################### SES MPD DISTRIBUTION: WEIGHTED 
weighted_fraction_wilcox <- wilcox.test(mpd.obs.z ~ fraction, data = weighted_df)
weighted_fraction_wilcox
## 
##  Wilcoxon rank sum test
## 
## data:  mpd.obs.z by fraction
## W = 81, p-value = 0.6297
## alternative hypothesis: true location shift is not equal to 0
filter(weighted_df) %>%
  group_by(fraction) %>%
  summarize(mean(mpd.obs.z))
## # A tibble: 2 × 2
##   fraction `mean(mpd.obs.z)`
##     <fctr>             <dbl>
## 1 Particle        -0.3607944
## 2     Free        -0.3854885
# Make a data frame to draw significance line in boxplot (visually calculated)
dat5 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(1.6,1.7,1.7,1.6)) # WholePart vs WholeFree


weight_distribution_plot <- 
  ggplot(weighted_df, aes(y = mpd.obs.z, x = fraction)) +
  #scale_color_manual(values = fraction_colors) + 
  scale_fill_manual(values = fraction_colors) +
  geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) + 
  geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
  scale_y_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) + 
  ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
  xlab("Fraction") + 
  geom_hline(yintercept = 0, linetype = "dashed", size = 1.5) +
  ##### Particle vs free per-cell production 
  geom_path(data = dat5, aes(x = a, y = b), linetype = 1, color = "#424645") +
  annotate("text", x=1.5, y=1.55, fontface = "bold",  size = 3.5, color = "#424645", label= "NS") +
  theme(legend.position = "none", #axis.title.y = element_blank(),
        axis.title.x = element_blank(), axis.text.x = element_blank()) +
  coord_flip()




# Is there a relationship between richness and Unweighted Mean Pairwise distance?
summary(lm(mean ~ mpd.obs.z, data = unweighted_df))
## 
## Call:
## lm(formula = mean ~ mpd.obs.z, data = unweighted_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -195.76  -96.00  -14.21   80.09  295.21 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   443.89      28.29  15.690 1.98e-13 ***
## mpd.obs.z    -124.90      34.37  -3.634  0.00147 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 138.5 on 22 degrees of freedom
## Multiple R-squared:  0.3751, Adjusted R-squared:  0.3467 
## F-statistic: 13.21 on 1 and 22 DF,  p-value: 0.001466
summary(lm(mean ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Particle")))
## 
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(unweighted_df, fraction == 
##     "Particle"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -191.77 -112.16  -41.89   55.88  278.91 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   515.16      51.16  10.069 1.49e-06 ***
## mpd.obs.z     -83.77      49.91  -1.678    0.124    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 155 on 10 degrees of freedom
## Multiple R-squared:  0.2198, Adjusted R-squared:  0.1417 
## F-statistic: 2.817 on 1 and 10 DF,  p-value: 0.1242
summary(lm(mean ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Free")))
## 
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(unweighted_df, fraction == 
##     "Free"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -100.15  -66.22  -18.70   43.65  172.92 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  337.091     42.747   7.886 1.34e-05 ***
## mpd.obs.z      3.049     77.511   0.039    0.969    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 90.18 on 10 degrees of freedom
## Multiple R-squared:  0.0001547,  Adjusted R-squared:  -0.09983 
## F-statistic: 0.001547 on 1 and 10 DF,  p-value: 0.9694
unweight_rich_vs_mpd_plot <- 
  ggplot(unweighted_df, aes(y = mean, x = mpd.obs.z)) +
  geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
  geom_point(size = 3, shape = 21, aes(fill = fraction)) + ylab("Observed  Richness") +
  xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
  scale_fill_manual(values = fraction_colors) +
  geom_smooth(method = "lm", color = "#424645", fill = "#424645", alpha = 0.3) + 
  scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) + 
  annotate("text", x = 0.75, y=750, color = "#424645", fontface = "bold",
           label = paste("R2 =", round(summary(lm(mean ~ mpd.obs.z, data = unweighted_df))$adj.r.squared, digits = 2), "\n", 
                         "p =", round(unname(summary(lm(mean ~ mpd.obs.z, data = unweighted_df))$coefficients[,4][2]), digits = 3))) +
  theme(legend.title = element_blank(), legend.position = "none", 
        axis.text.x = element_blank(), axis.title.x = element_blank())



# Is there a relationship between inverse simpson and Weighted Mean Pairwise distance?
#summary(lm(mean ~ mpd.obs.z, data = weighted_df)) # NS
  
summary(lm(mean ~ mpd.obs.z, data = filter(weighted_df, fraction == "Particle")))
## 
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(weighted_df, fraction == 
##     "Particle"))
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -28.42 -19.15   0.40  12.95  40.67 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    23.58      12.61   1.870   0.0911 .
## mpd.obs.z     -32.98      29.43  -1.121   0.2886  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23.57 on 10 degrees of freedom
## Multiple R-squared:  0.1116, Adjusted R-squared:  0.02276 
## F-statistic: 1.256 on 1 and 10 DF,  p-value: 0.2886
summary(lm(mean ~ mpd.obs.z, data = filter(weighted_df, fraction == "Free")))
## 
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(weighted_df, fraction == 
##     "Free"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.0597  -4.3465  -0.8155   3.8832  18.4294 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   22.187      4.720   4.701  0.00084 ***
## mpd.obs.z     -4.942     10.486  -0.471  0.64753    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.441 on 10 degrees of freedom
## Multiple R-squared:  0.02173,    Adjusted R-squared:  -0.0761 
## F-statistic: 0.2221 on 1 and 10 DF,  p-value: 0.6475
weight_invsimps_vs_mpd_plot <- 
  ggplot(weighted_df, aes(y = mean, x = mpd.obs.z, fill = fraction)) +
  geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
  geom_point(size = 3, shape = 21) + ylab("Inverse Simpson") +
  #xlab("Standardized Effect Size \n Weighted Mean Pairwise Dist") +
  scale_fill_manual(values = fraction_colors) +
  scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) + 
  theme(legend.title = element_blank(), legend.position = "none",
        axis.text.x = element_blank(), axis.title.x = element_blank())




# Is there a relationship between Production and Unweighted Mean Pairwise distance?
#summary(lm(frac_bacprod ~ mpd.obs.z, data = unweighted_df)) # NS 

summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Particle")))
## 
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df, 
##     fraction == "Particle"))
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.145 -4.312 -1.824  3.402 19.528 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    8.213      2.617   3.138   0.0105 *
## mpd.obs.z     -3.511      2.553  -1.375   0.1991  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.928 on 10 degrees of freedom
## Multiple R-squared:  0.159,  Adjusted R-squared:  0.07494 
## F-statistic: 1.891 on 1 and 10 DF,  p-value: 0.1991
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Free")))
## 
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df, 
##     fraction == "Free"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -21.997 -14.937   2.190   8.751  37.007 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   18.194      8.397   2.167   0.0555 .
## mpd.obs.z     13.407     15.225   0.881   0.3992  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.71 on 10 degrees of freedom
## Multiple R-squared:  0.07196,    Adjusted R-squared:  -0.02084 
## F-statistic: 0.7754 on 1 and 10 DF,  p-value: 0.3992
unweight_prod_vs_mpd_plot <- 
  ggplot(unweighted_df, aes(y = frac_bacprod, x = mpd.obs.z, fill = fraction)) +
  geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
  geom_point(size = 3, shape = 21) + 
  ylab("Heterotrophic Production \n (μgC/L/hr)") +
  xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
  scale_fill_manual(values = fraction_colors) +
  scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) + 
  theme(legend.title = element_blank(), legend.position = "none",
        axis.text.x = element_blank(), axis.title.x = element_blank())


# Is there a relationship between Production and Weighted Mean Pairwise distance?
summary(lm(frac_bacprod ~ mpd.obs.z, data = weighted_df))
## 
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = weighted_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.030 -10.236  -2.842   5.937  38.409 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)    9.008      5.664   1.590    0.126
## mpd.obs.z    -21.439     12.889  -1.663    0.110
## 
## Residual standard error: 14.66 on 22 degrees of freedom
## Multiple R-squared:  0.1117, Adjusted R-squared:  0.07133 
## F-statistic: 2.767 on 1 and 22 DF,  p-value: 0.1104
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(weighted_df, fraction == "Particle")))
## 
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(weighted_df, 
##     fraction == "Particle"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.240  -4.575  -1.717   4.503  15.352 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)    4.391      4.126   1.064    0.312
## mpd.obs.z    -15.432      9.627  -1.603    0.140
## 
## Residual standard error: 7.711 on 10 degrees of freedom
## Multiple R-squared:  0.2044, Adjusted R-squared:  0.1249 
## F-statistic: 2.569 on 1 and 10 DF,  p-value: 0.14
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(weighted_df, fraction == "Free")))
## 
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(weighted_df, 
##     fraction == "Free"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -19.790 -12.275  -3.481   7.360  30.573 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   14.697      9.683   1.518    0.160
## mpd.obs.z    -24.285     21.512  -1.129    0.285
## 
## Residual standard error: 17.32 on 10 degrees of freedom
## Multiple R-squared:  0.113,  Adjusted R-squared:  0.02434 
## F-statistic: 1.274 on 1 and 10 DF,  p-value: 0.2853
weight_prod_vs_mpd_plot <- 
  ggplot(weighted_df, aes(y = frac_bacprod, x = mpd.obs.z, fill = fraction)) +
  geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction), alpha = 0.7) + # X-axis errorbars
  geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
  geom_point(size = 3, shape = 21) + 
  ylab("Heterotrophic Production \n (μgC/L/hr)") +
  xlab("Standardized Effect Size \n Weighted Mean Pairwise Dist") +
  scale_fill_manual(values = fraction_colors) +
  scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) + 
  theme(legend.title = element_blank(), legend.position = "none",
        axis.text.x = element_blank(), axis.title.x = element_blank())



# Is there a relationship between PER-CELL PRODUCTION and Unweighted Mean Pairwise distance?
unweight_vs_percell <- lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = unweighted_df)
summary(unweight_vs_percell)
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = unweighted_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.72262 -0.28981  0.00389  0.12961  1.31931 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.1988     0.0999 -72.063  < 2e-16 ***
## mpd.obs.z    -0.4973     0.1205  -4.128 0.000479 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4778 on 21 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.4479, Adjusted R-squared:  0.4216 
## F-statistic: 17.04 on 1 and 21 DF,  p-value: 0.0004788
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Particle")))
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df, 
##     fraction == "Particle"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37955 -0.24192 -0.16507  0.04426  1.18158 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -6.9247     0.1711 -40.472 1.71e-11 ***
## mpd.obs.z    -0.3299     0.1627  -2.027   0.0732 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4649 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.3135, Adjusted R-squared:  0.2372 
## F-statistic:  4.11 on 1 and 9 DF,  p-value: 0.07324
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Free")))
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df, 
##     fraction == "Free"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6398 -0.2978  0.0770  0.1760  0.7651 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -7.55616    0.19601 -38.550 3.29e-12 ***
## mpd.obs.z   -0.04317    0.35540  -0.121    0.906    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4135 on 10 degrees of freedom
## Multiple R-squared:  0.001473,   Adjusted R-squared:  -0.09838 
## F-statistic: 0.01475 on 1 and 10 DF,  p-value: 0.9057
unweight_percell_vs_mpd_plot <- 
  ggplot(unweighted_df, 
       aes(y = log10(fracprod_per_cell_noinf), x = mpd.obs.z)) +
  geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
  geom_point(size = 3, shape = 21, aes(fill = fraction)) + 
  ylab("log10(Production/cell)\n (μgC/cell/hr)") +
  xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
  scale_fill_manual(values = fraction_colors) +
  geom_smooth(method = "lm", color="#424645", fill = "#424645", alpha = 0.3) +
  #stat_smooth(method="lm", se=TRUE, formula= y ~ poly(x, 2, raw=TRUE), color="#424645", fill = "#424645", alpha = 0.3) +
  scale_y_continuous(limits = c(-8.5,-5), breaks = c(-8, -7, -6)) + 
  scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) + 
  annotate("text", x = 0.75, y=-6, color = "#424645", fontface = "bold",
         label = paste("R2 =", round(summary(unweight_vs_percell)$adj.r.squared, digits = 2), "\n", 
                       "p =", round(unname(summary(unweight_vs_percell)$coefficients[,4][2]), digits = 4))) +
  theme(legend.title = element_blank(), legend.position ="bottom", 
        legend.text = element_text(size = 14))


# Is there a relationship between PER-CELL PRODUCTION and Weighted Mean Pairwise distance?
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = weighted_df))
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = weighted_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.94765 -0.40616 -0.03619  0.31885  1.39101 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.5352     0.2442 -30.860   <2e-16 ***
## mpd.obs.z    -0.9519     0.5446  -1.748   0.0951 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6009 on 21 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.127,  Adjusted R-squared:  0.08544 
## F-statistic: 3.055 on 1 and 21 DF,  p-value: 0.09508
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df, fraction == "Particle")))
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df, 
##     fraction == "Particle"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.65329 -0.32632 -0.03486  0.22224  0.95307 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.0845     0.3015 -23.496 2.18e-09 ***
## mpd.obs.z    -0.9337     0.6753  -1.383      0.2    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5096 on 9 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1752, Adjusted R-squared:  0.08357 
## F-statistic: 1.912 on 1 and 9 DF,  p-value: 0.2001
lm_percell_free_mpd <- lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df, fraction == "Free"))
summary(lm_percell_free_mpd)
## 
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df, 
##     fraction == "Free"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.54189 -0.16159 -0.00204  0.20070  0.44618 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.9519     0.1848 -43.019 1.11e-12 ***
## mpd.obs.z    -0.9777     0.4107  -2.381   0.0386 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3306 on 10 degrees of freedom
## Multiple R-squared:  0.3617, Adjusted R-squared:  0.2979 
## F-statistic: 5.667 on 1 and 10 DF,  p-value: 0.03857
weight_percell_vs_mpd_plot <- 
  ggplot(weighted_df, 
       aes(y = log10(fracprod_per_cell_noinf), x = mpd.obs.z, fill = fraction)) +
  geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
  geom_point(size = 3, shape = 21) + 
  ylab("log10(Production/cell)\n (μgC/cell/hr)") +
  xlab("Standardized Effect Size \n Weighted Mean Pairwise Dist") +
  scale_fill_manual(values = fraction_colors) +
  geom_smooth(method = "lm", data = filter(weighted_df, fraction == "Free"), color = "skyblue", fill = "skyblue", alpha = 0.3) +
  scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) + 
    scale_y_continuous(limits = c(-8.5,-5), breaks = c(-8, -7, -6)) + 
  annotate("text", x = 0.3, y=-7.9, color = "skyblue", fontface = "bold",
     label = paste("R2 =", round(summary(lm_percell_free_mpd)$adj.r.squared, digits = 3), "\n", 
                   "p =", round(unname(summary(lm_percell_free_mpd)$coefficients[,4][2]), digits = 3))) +
  theme(legend.title = element_blank(), legend.position ="bottom", 
        legend.text = element_text(size = 14))



plot_grid(unweight_distribution_plot, unweight_rich_vs_mpd_plot, unweight_prod_vs_mpd_plot, unweight_percell_vs_mpd_plot, 
          labels = c("A", "B", "C", "D"), ncol = 1, nrow = 4,
          rel_heights = c(0.5, 0.8, 0.8, 1.2),
          align = "v")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>

plot_grid(weight_distribution_plot, weight_invsimps_vs_mpd_plot, weight_prod_vs_mpd_plot, weight_percell_vs_mpd_plot, 
          labels = c("A", "B", "C", "D"), ncol = 1, nrow = 4,
          rel_heights = c(0.5, 0.8, 0.8, 1.2),
          align = "v")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>

Randomized Richness

#########################################################  Subset only richness data 
### These are the richness values for the fake samples 
#rich_stats <- filter(otu_alphadiv, measure == "Richness") %>%
#  dplyr::select(1:2) %>%
#  rename(mean_richness = mean) %>%
#  mutate(sample = paste("Sample_", seq(1:nrow(filter(otu_alphadiv, measure == "Richness"))), sep = ""),
#         mean_richness = matround(mean_richness))

## Pick OTUs to match these richness values 

  # List the otus from ALL samples 
#  all_otus <- taxa_names(surface_PAFL_otu_pruned_rm2)
  
  # Obtain the OTU table from the phyloseq object
#  otutab <- otu_table(surface_PAFL_otu_pruned_rm2)
  # Make all the counts to be 0
#  otutab_newvals <- apply(otutab, c(1, 2), function(x) 0)

  # Stop if things are wrong 
#  stopifnot(colnames(otutab_newvals) == all_otus)                       # Do the OTU names match?
#  stopifnot(rownames(otutab_newvals) == rich_stats$norep_filter_name)   # Do the sample names match?
  
# Make it reproducible!   
#set.seed(777)
  
#for (row in 1:nrow(rich_stats)) {
  
  # Pick the richness value 
#  rich_val <- rich_stats[row, 2]  
  
  # Sample the OTUs to represent that richness value 
#  col_index <- sample(ncol(otutab_newvals), rich_val, replace = FALSE, prob = NULL)
  
  # make all other columns 0
#  otutab_newvals[row, col_index] <- 1

#}


## Calculate the tree for those randomized samples 
# create a new phyloseq object 
#random_physeq_presab_raw <- phyloseq(otu_table(otutab_newvals, taxa_are_rows = FALSE), 
#                                 tax_table(surface_PAFL_otu_pruned_rm2), sample_data(surface_PAFL_otu_pruned_rm2))
#random_physeq_presab_raw

# Remove taxa that are 0!
#random_physeq_presab_pruned <- prune_taxa(taxa_sums(random_physeq_presab_raw) > 0, random_physeq_presab_raw) 
#random_physeq_presab_pruned

# Calculate tree 
# Obtain the OTU names that were retained in the dataset
#tax <- data.frame(tax_table(random_physeq_presab_pruned))
#vector_of_otus <- as.vector(tax$OTU)

# Write out the file for processing/fasttree
# write(vector_of_otus, file = "data/PhyloTree/randomized/random_physeq_presab_pruned_OTUnames.txt", ncolumns = 1, append = FALSE, sep = "\n")

Is there a relationship between randomized richness and unweighted SESMPD?

# Read in the tree
randomized_tree <- read.tree(file = "data/PhyloTree/randomized/newick_tree_randomized_rmN.tre")
  
  #random_physeq_presab_pruned_tree <- merge_phyloseq(random_physeq_presab_pruned, randomized_tree)
  #random_physeq_presab_pruned_tree
#save(list="random_physeq_presab_pruned_tree", file=paste0("data/PhyloTree/randomized/random_physeq_presab_pruned_tree.RData")) 

load("data/PhyloTree/randomized/random_physeq_presab_pruned_tree.RData")
random_physeq_presab_pruned_tree
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 2911 taxa and 24 samples ]
## sample_data() Sample Data:       [ 24 samples by 44 sample variables ]
## tax_table()   Taxonomy Table:    [ 2911 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 2911 tips and 2909 internal nodes ]
# First force the OTU 
randomized_otu <- matrix(otu_table(random_physeq_presab_pruned_tree), nrow = nrow(otu_table(random_physeq_presab_pruned_tree)))
rownames(randomized_otu) <- sample_names(random_physeq_presab_pruned_tree)
colnames(randomized_otu) <- taxa_names(random_physeq_presab_pruned_tree)
    
  
## Calculate input for SES_MPD  
# Convert the presence/absence data to standardized abundanced  vegan function `decostand' , NOTE: method = "pa"
otu_decostand <- decostand(randomized_otu, method = "pa")
# check total abundance in each sample
apply(otu_decostand, 1, sum)
## MBREJ515 MBREJ715 MBREJ915 MBREK515 MBREK715 MBREK915 MDPEJ515 MDPEJ715 MDPEJ915 MDPEK515 MDPEK715 MDPEK915 MINEJ515 MINEJ715 MINEJ915 MINEK515 MINEK715 MINEK915 MOTEJ515 MOTEJ715 MOTEJ915 MOTEK515 MOTEK715 MOTEK915 
##      906      574      434      268      256      358      493      447      476      276      284      381      840      632      586      452      383      511      505      343      444      274      238      381
# check for mismatches/missing species between community data and phylo tree
randomized_matches <- match.phylo.comm(randomized_tree, otu_decostand)
# the resulting object is a list with $phy and $comm elements.  replace our
# original data with the sorted/matched data
phy_randomized_rm2 <- randomized_matches$phy
comm_randomized_rm2 <- randomized_matches$comm

# Calculate the phylogenetic distances
phy_dist_randomized_rm2 <- cophenetic(phy_randomized_rm2)

  
## Calculate SES_MPD
###################################### INDEPENDENT SWAP ############################################
# calculate standardized effect size mean pairwise distance (ses.mpd)
unweighted_sesMPD_indepswap_randomized <- ses.mpd(comm_randomized_rm2, phy_dist_randomized_rm2, null.model = "independentswap", 
                                     abundance.weighted = FALSE, runs = 999)

df <- unweighted_sesMPD_indepswap_randomized

df$names <- row.names(df)
df_2 <- make_metadata_norep(df) %>%
  mutate(fraction = fct_recode(fraction, Particle = "WholePart", Free = "WholeFree"))
  
  
# Is there a relationship between richness and Unweighted Mean Pairwise distance?
summary(lm(ntaxa ~ mpd.obs.z, data = df_2))
## 
## Call:
## lm(formula = ntaxa ~ mpd.obs.z, data = df_2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -212.74 -129.44  -12.54   53.58  468.91 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   448.27      35.48  12.633 1.47e-11 ***
## mpd.obs.z      56.32      94.24   0.598    0.556    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 173.7 on 22 degrees of freedom
## Multiple R-squared:  0.01598,    Adjusted R-squared:  -0.02875 
## F-statistic: 0.3572 on 1 and 22 DF,  p-value: 0.5562
summary(lm(ntaxa ~ mpd.obs.z, data = filter(df_2, fraction == "Particle")))
## 
## Call:
## lm(formula = ntaxa ~ mpd.obs.z, data = filter(df_2, fraction == 
##     "Particle"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -203.87 -109.15  -44.96   44.29  341.78 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   557.62      50.67  11.006 6.56e-07 ***
## mpd.obs.z     -33.22     140.62  -0.236    0.818    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 175 on 10 degrees of freedom
## Multiple R-squared:  0.00555,    Adjusted R-squared:  -0.0939 
## F-statistic: 0.05581 on 1 and 10 DF,  p-value: 0.818
summary(lm(ntaxa ~ mpd.obs.z, data = filter(df_2, fraction == "Free")))
## 
## Call:
## lm(formula = ntaxa ~ mpd.obs.z, data = filter(df_2, fraction == 
##     "Free"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -107.76  -48.80  -17.85   56.29  159.66 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   342.48      24.62  13.913 7.19e-08 ***
## mpd.obs.z      74.88      62.79   1.193    0.261    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 84.48 on 10 degrees of freedom
## Multiple R-squared:  0.1245, Adjusted R-squared:  0.03699 
## F-statistic: 1.422 on 1 and 10 DF,  p-value: 0.2605
ggplot(df_2, aes(y = ntaxa, x = mpd.obs.z, fill = fraction)) +
  geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
  geom_point(size = 3, shape = 21) + ylab("Randomized Richness") +
  xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
  scale_fill_manual(values = fraction_colors) +
  scale_x_continuous(limits = c(-1,1)) + 
  theme(legend.title = element_blank(), legend.position = c(0.15, 0.9))